source("../modularity_analysis_functions.R", local = knitr::knit_global())
# reading the microbiome data
# working only with Rattus from the three villages

# villages: Andatsakala, Mandena, Sarahandrano
data_asv <- read_csv("../../data/data_processed/microbiome/data_asv_rra0.001_p0.01_th5000_all.csv")

1 Exploration

# small mammals data
small_mammals <- read_csv("../../data/data_raw/data_small_mammals/Terrestrial_Mammals.csv") %>% 
  mutate(host_ID = as.numeric(gsub(".*?([0-9]+).*", "\\1", animal_id))) %>% 
  mutate(age_repro = as_factor(age_repro)) %>% 
  dplyr::rename(grid = habitat_type) %>% 
  filter(host_ID %in% data_asv$host_ID)

cat("## Abundance", '\n','\n')

1.1 Abundance

g <- small_mammals %>% 
  count(village,grid) %>% 
  ggplot(aes(x=grid, y=n)) +
  geom_bar(position="dodge", stat="identity") +
  theme_bw() +
  theme(axis.text = element_text(size = 10, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14)) +
  labs(x="Land Use", y="Small-Mammals Abundance")
print(g)

cat("## Sex ratio", '\n','\n')

1.2 Sex ratio

g <- small_mammals %>% 
  count(grid, sex) %>% 
  ggplot(aes(fill=sex, x=grid, y=n)) +
  geom_bar(position="fill", stat="identity") +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  theme_bw() +
  theme(axis.text = element_text(size = 10, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14)) +
  labs(x="Land Use", y="Proportion")
print(g)

cat("## Age ratio", '\n','\n')

1.3 Age ratio

g <- small_mammals %>% 
  count(grid, age_repro) %>% 
  ggplot(aes(fill=age_repro, x=grid, y=n)) +
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  theme(axis.text = element_text(size = 10, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14)) +
  labs(x="Land Use", y="Proportion")
print(g)

# relative abundance of the 10 most abundant Family of each group

# microbes taxonomy
tax <- read_delim("../../data/data_raw/data_microbiome/ASVs_taxonomy_new.tsv") %>% 
  dplyr::rename(asv_ID = ASV)

data_asv_tax <- data_asv %>% 
  left_join(tax, by="asv_ID")

cat("## Relative abundance Family", '\n','\n')

1.4 Relative abundance Family

total_reads_groups <- data_asv_tax %>% distinct(host_ID, asv_core, total_reads) %>% group_by(asv_core) %>% summarise(n_total = sum(total_reads))
most_abu_family <- data_asv_tax %>%
  mutate(reads_a = reads*total_reads) %>%
  group_by(asv_core, Family) %>%
  summarise(n= sum(reads_a)) %>%
  left_join(total_reads_groups, by="asv_core") %>%
  mutate(n_p = n/n_total) 

  most_abu_family_8 <- most_abu_family %>% 
    filter(!is.na(Family)) %>% 
    ungroup() %>% 
    slice_max(by = asv_core, order_by = n_p, n = 6) %>% 
    mutate(p = "top") %>% 
    select(asv_core, Family, p)
  
  most_abu_family_p <- data_asv_tax %>%
  mutate(reads_a = reads*total_reads) %>%
  group_by(asv_core, Family, grid) %>%
  summarise(n= sum(reads_a)) %>%
  left_join(total_reads_groups, by="asv_core") %>%
  mutate(n_p = n/n_total) %>% 
    left_join(most_abu_family_8, by=c("asv_core","Family")) %>% 
    mutate(p = case_when(p=="top" ~ Family,
                         is.na(Family) ~ ".NA",
                         is.na(p) ~ ".Other"))
library("RColorBrewer")
colors <- c("grey40","grey80","darkblue","#ffd60a",brewer.pal(n = 12, name = "Paired"))
  g <- most_abu_family_p %>% 
    group_by(asv_core,grid,  p) %>% 
    summarise(n_p = sum(n_p)) %>% 
ggplot(aes(fill=p, x=grid, y=n_p)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~asv_core) +
  theme_bw() +
  theme(axis.text = element_text(size = 10, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14),
        strip.text = element_text(size=12, color = 'black'), 
        panel.grid = element_blank(), panel.background = element_rect(colour = "black"), legend.key.size = unit(0.5, "cm")) +
  scale_fill_manual(values=colors) +
  labs(x="Land Use", y="Relative Abundance", fill = "Family")
print(g)

cat('\n','\n')

2 PERMANOVA

2.1 Bray-Curtis

set.seed(123)
data_asv_mat <- data_asv %>% 
  #filter(asv_core == "Core") %>% 
  select(host_ID, asv_ID, reads) %>% 
  spread(asv_ID, reads, fill = 0) %>% 
  column_to_rownames("host_ID") %>% 
  as.matrix()

hosts <- small_mammals %>% 
  filter(host_ID %in% rownames(data_asv_mat)) %>% 
  select(host_ID,village,grid,season,month,mass, sex,age_repro,age_dental,elevation.obs) %>% 
  mutate(village=as.factor(village),grid=as.factor(grid), season=as.factor(season),month=as.factor(month), sex=as.factor(sex), age_dental=as.factor(age_dental))

distance_matrix <- vegdist(data_asv_mat, method = "bray")

# Perform PERMANOVA
permanova_result <- adonis2(distance_matrix ~ grid+season+village, data = hosts, permutations = 999)
print(knitr::kable(permanova_result[,4:5]))
F Pr(>F)
grid 2.331399 0.001
season 4.768973 0.001
village 5.666093 0.001
Residual NA NA
Total NA NA
cat('\n')
# PERMANOVA post-hoc
# perm_post_grid <- RVAideMemoire::pairwise.perm.manova(distance_matrix, fact = hosts$grid, 
#     test = "bonferroni", nperm = 999, progress = FALSE)$p.value %>% as.data.frame()
# print(knitr::kable(perm_post_grid))
# cat('\n')

#NMDS
nmds_result <- metaMDS(distance_matrix, distance = "bray", k = 2, trace = F)

# preparing data for plotting
nmds_plot <- nmds_result$points %>% 
  as.data.frame() %>%  
  rownames_to_column("host_ID") %>% 
  mutate(host_ID = as.double(host_ID)) %>% 
  left_join(hosts, by="host_ID")

# plotting
g1 <- nmds_plot %>% 
  ggplot( aes(MDS1, MDS2, color=grid, shape=village)) +
  geom_point(size = 2, position=position_jitter(.01)) +
  #stat_ellipse(aes(fill=grid), alpha=.1, type='norm',linetype =2, geom="polygon") + ##draws 95% confidence interval ellipses
  theme_bw() +
  annotate("text", x=min(nmds_plot$MDS1)+0.04, size=3, y=max(nmds_plot$MDS2), label=paste('Stress =',round(nmds_result$stress,3))) +
  labs(x = "NMDS1", y = "NMDS2", title = "(A) Bray-Curtis")
print(g1)

2.2 UniFrac

set.seed(123)
library(GUniFrac)

rooted_phylo_tree <- phangorn::midpoint(phylo_tree)
distance_matrix <- GUniFrac(data_asv_mat, rooted_phylo_tree, verbose = F)$unifracs[, , "d_UW"]

# Perform weighted UniFrac
unifrac_result <- adonis2(distance_matrix ~ grid+season+village, data = hosts, permutations = 999)
print(knitr::kable(unifrac_result[,4:5]))
F Pr(>F)
grid 2.644989 0.001
season 6.105313 0.001
village 8.450150 0.001
Residual NA NA
Total NA NA
cat('\n')
# PERMANOVA post-hoc
# perm_post_grid <- RVAideMemoire::pairwise.perm.manova(distance_matrix, fact = hosts$grid, 
#     test = "bonferroni", nperm = 999, progress = FALSE)$p.value %>% as.data.frame()
# print(knitr::kable(perm_post_grid))
# cat('\n')

#NMDS
nmds_result2 <- metaMDS(distance_matrix, k = 2, trace = F)

# preparing data for plotting
nmds_plot2 <- nmds_result2$points %>% 
  as.data.frame() %>%  
  rownames_to_column("host_ID") %>% 
  mutate(host_ID = as.double(host_ID)) %>% 
  left_join(hosts, by="host_ID")

# plotting
g2 <- nmds_plot2 %>% 
  ggplot( aes(MDS1, MDS2, color=grid, shape=season)) +
  geom_point(size = 2, position=position_jitter(.05)) +
  #stat_ellipse(aes(fill=grid), alpha=.1, type='norm',linetype =2, geom="polygon") + ##draws 95% confidence interval ellipses
  theme_bw() +
  annotate("text", x=min(nmds_plot2$MDS1)+0.15, size=3, y=max(nmds_plot2$MDS2), label=paste('Stress =',round(nmds_result2$stress,3))) +
  labs(x = "NMDS1", y = "NMDS2", title = "(B) Weighted UniFrac")

2.3 Figure

# plotting
g1 + g2 + plot_layout(guides='collect') &
  theme(legend.position='bottom', legend.spacing.x=unit(0.1, 'cm'))

cat('\n','\n')

3 Microbial Groups

3.1 Genus

# taking only ASVs with known Genus
data_tax <- data_asv_tax %>% 
  distinct(host_ID, asv_core, Genus) %>% 
  filter(!is.na(Genus))

# known ASVs
asv_genus <- data_asv_tax %>% 
  distinct(asv_ID, Genus) %>%
  mutate(name = is.na(Genus)) 
  print(paste("Percentege of known ASVs:", round(table(asv_genus$name)[1]/nrow(asv_genus)*100, 2)))

[1] “Percentege of known ASVs: 41.26”

  cat('\n')
# how many Genus?
print(paste("Number of genera:", length(unique(data_tax$Genus))))

[1] “Number of genera: 119”

cat('\n')
# making unique samples (host_ID+asv_core)
data_tax_meta <- data_tax %>% 
  arrange(host_ID) %>% 
  unite(col="sample", host_ID:asv_core, remove = FALSE)
  
data_tax_meta2 <- data_tax_meta %>% distinct(sample, asv_core) 

data_tax_summary <- data_tax %>% 
  count(asv_core, Genus) %>% 
  spread(Genus, n, fill = 0)

# transforming to matrix
data_tax_mat <- data_tax_meta %>% 
  select(sample, Genus) %>% 
  mutate(n=1) %>% 
  spread(Genus, n, fill = 0) %>% 
  column_to_rownames("sample") %>% 
  as.matrix()

# Perform PERMANOVA
permanova_result <- adonis2(data_tax_mat ~ asv_core, data = data_tax_meta2, method = "jaccard", permutations = 999)
print(paste("F =", permanova_result$F[1]))

[1] “F = 155.763265711577”

cat('\n')
print(paste("p-value =", permanova_result$`Pr(>F)`[1]))

[1] “p-value = 0.001”

cat('\n')
# PCA
tax_pca <- prcomp(data_tax_mat)

# explained variance
ex_var <- tax_pca$sdev ^2 
prop_ex_var <- ex_var/sum(ex_var)*100

loadings <- as.data.frame(tax_pca$rotation[, 1:2]) %>% rownames_to_column("Genus") %>% 
  mutate(PC1_abs = abs(PC1), PC2_abs = abs(PC2))

tax_scores <- as.data.frame(tax_pca$x[,1:2]) %>% rownames_to_column("sample") %>% 
  left_join(data_tax_meta2, by="sample") 

loading_top <- loadings %>% 
  slice_max(n = 5, order_by = PC1_abs) %>% 
  bind_rows(loadings %>% slice_max(n = 5, order_by = PC2_abs)) 

g1 <- tax_scores %>% 
  ggplot(aes(PC1, PC2, color=asv_core)) +
  geom_point() +
  geom_segment(data = loading_top, inherit.aes = FALSE, aes(x = 0, y = 0, xend = PC1*6, yend = PC2*6), 
                 arrow = arrow(length = unit(0.15, "cm"))) +
  annotate("label", x = (loading_top$PC1*6), y = (loading_top$PC2*6),
     label = loading_top$Genus, size=3, label.size=0.1, label.padding=unit(0.05, "lines")) +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", size=1))+
  scale_fill_manual(values=group.colors) +
  labs(x = paste("PC1 (",round(prop_ex_var[1],2),"%)", sep = ""), y = paste("PC2 (",round(prop_ex_var[2],2),"%)", sep = ""), 
       title = "(A) Genus", color = "ASV Group")

3.2 Family

# taking only ASVs with known Family
data_tax <- data_asv_tax %>% 
  distinct(host_ID, asv_core, Family) %>% 
  filter(!is.na(Family))

# known ASVs
asv_genus <- data_asv_tax %>% 
  distinct(asv_ID, Family) %>%
  mutate(name = is.na(Family)) 
  print(paste("Percentege of known ASVs:", round(table(asv_genus$name)[1]/nrow(asv_genus)*100, 2)))

[1] “Percentege of known ASVs: 90.72”

  cat('\n')
# how many Family?
print(paste("Number of families:", length(unique(data_tax$Family))))

[1] “Number of families: 55”

cat('\n')
# making unique samples (host_ID+asv_core)
data_tax_meta <- data_tax %>% 
  arrange(host_ID) %>% 
  unite(col="sample", host_ID:asv_core, remove = FALSE)
  
data_tax_meta2 <- data_tax_meta %>% distinct(sample, asv_core) 

data_tax_summary <- data_tax %>% 
  count(asv_core, Family) %>% 
  spread(Family, n, fill = 0)

# transforming to matrix
data_tax_mat <- data_tax_meta %>% 
  select(sample, Family) %>% 
  mutate(n=1) %>% 
  spread(Family, n, fill = 0) %>% 
  column_to_rownames("sample") %>% 
  as.matrix()

# Perform PERMANOVA
permanova_result <- adonis2(data_tax_mat ~ asv_core, data = data_tax_meta2, method = "jaccard", permutations = 999)
print(paste("F =", permanova_result$F[1]))

[1] “F = 188.830216433384”

cat('\n')
print(paste("p-value =", permanova_result$`Pr(>F)`[1]))

[1] “p-value = 0.001”

cat('\n')
# PCA
tax_pca <- prcomp(data_tax_mat)

# explained variance
ex_var <- tax_pca$sdev ^2 
prop_ex_var <- ex_var/sum(ex_var)*100

loadings <- as.data.frame(tax_pca$rotation[, 1:2]) %>% rownames_to_column("Family") %>% 
  mutate(PC1_abs = abs(PC1), PC2_abs = abs(PC2))

tax_scores <- as.data.frame(tax_pca$x[,1:2]) %>% rownames_to_column("sample") %>% 
  left_join(data_tax_meta2, by="sample") 

loading_top <- loadings %>% 
  slice_max(n = 5, order_by = PC1_abs) %>% 
  bind_rows(loadings %>% slice_max(n = 5, order_by = PC2_abs)) 

g2 <- tax_scores %>% 
  ggplot(aes(PC1, PC2, color=asv_core)) +
  geom_point() +
  geom_segment(data = loading_top, inherit.aes = FALSE, aes(x = 0, y = 0, xend = PC1*4, yend = PC2*4), 
                 arrow = arrow(length = unit(0.15, "cm"))) +
  annotate("label", x = (loading_top$PC1*4), y = (loading_top$PC2*4),
     label = loading_top$Family, size=3, label.size=0.1, label.padding=unit(0.05, "lines")) +
  scale_x_continuous(limits = c(-2, 2.7)) +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", size=1))+
  scale_fill_manual(values=group.colors) +
  labs(x = paste("PC1 (",round(prop_ex_var[1],2),"%)", sep = ""), y = paste("PC2 (",round(prop_ex_var[2],2),"%)", sep = ""), 
       title = "(B) Family", color = "ASV Group")

3.3 Figure

# plotting
g1 + g2 + plot_layout(guides='collect') &
  theme(legend.position='bottom')

cat('\n','\n')

4 Network exploration

# loop for three groups
for (i in 1:3) {
  
  cat('##',core_names[i],'{.tabset}','\n','\n')
  
  cat('### ASVs degree distribution','\n')
  
  print(asv_degree_distribution_three_groups[[i]])
  cat('\n','\n')
  
  # calculating connectance
  connectance_data <- modules_table_three_groups %>% 
    filter(asv_core == core_names[i])
  
  cat('No. of hosts: ', length(unique(connectance_data$host_ID)) ,'\n','\n')
  cat('No. of ASVs: ', length(unique(connectance_data$asv_ID)) ,'\n','\n')
  cat('Connectance: ', nrow(connectance_data) / (length(unique(connectance_data$host_ID)) * length(unique(connectance_data$asv_ID))) ,'\n','\n')
  
  
  cat('### Modules','\n')
  cat('The color indicates number of host individuals in the module / total number of hosts in the whole grid [%]','\n','\n')
  
  print(modules_three_groups[[i]])
  cat('\n','\n')
  
  cat('### Modules size','\n')
  print(modules_size_three_groups[[i]])
  cat('\n','\n')
  
  cat('### No. of land uses','\n')
  print(modules_grid_three_groups[[i]])
  cat('\n','\n')
  
}

4.1 Core

4.1.1 ASVs degree distribution

No. of hosts: 853

No. of ASVs: 103

Connectance: 0.3439147

4.1.2 Modules

The color indicates number of host individuals in the module / total number of hosts in the whole grid [%]

4.1.3 Modules size

4.1.4 No. of land uses

4.2 Non-core

4.2.1 ASVs degree distribution

No. of hosts: 855

No. of ASVs: 1188

Connectance: 0.06003308

4.2.2 Modules

The color indicates number of host individuals in the module / total number of hosts in the whole grid [%]

4.2.3 Modules size

4.2.4 No. of land uses

4.3 Rare

4.3.1 ASVs degree distribution

No. of hosts: 829

No. of ASVs: 660

Connectance: 0.0148006

4.3.2 Modules

The color indicates number of host individuals in the module / total number of hosts in the whole grid [%]

4.3.3 Modules size

4.3.4 No. of land uses

5 Modular Structure Exploration

5.1 Modules size

library(ggsignif)
#library(ggbreak)

modules_sizes <- modules_table_three_groups %>% 
  group_by(asv_core, host_group) %>% 
  summarise(n = n_distinct(host_ID)) 

# summary
cat('Summary','\n')

Summary

modules_size_summary <- modules_sizes %>% 
  group_by(asv_core) %>% 
  summarise(mean = mean(n), sd = sd(n))
print(knitr::kable(modules_size_summary))
asv_core mean sd
Core 77.545455 116.355802
Non-core 10.961538 45.098001
Rare 4.791907 2.617593
cat('\n')
# ANOVA between groups
cat('ANOVA','\n')

ANOVA

anova_result <- aov(n ~ asv_core, data = modules_sizes)
print(paste("F =", summary(anova_result)[[1]][["F value"]][1]))

[1] “F = 24.2614554816964”

cat('\n')
print(paste("p-value =", summary(anova_result)[[1]][["Pr(>F)"]][1]))

[1] “p-value = 2.19880229088941e-10”

cat('\n')
# Perform Tukey's HSD post hoc test
cat('Tukey post-hoc','\n')

Tukey post-hoc

tukey_result <- TukeyHSD(anova_result)
# Extract the test statistics (q values) from the Tukey HSD results
tukey_q <- tukey_result$asv_core[, "diff"] / tukey_result$asv_core[, "lwr"]
tukey_result <- cbind(tukey_result$asv_core, q_value = tukey_q) %>% 
  as.data.frame() %>% 
  rownames_to_column("comp")
print(knitr::kable(tukey_result))
comp diff lwr upr p adj q_value
Non-core-Core -66.583916 -92.12682 -41.041011 0.0000000 0.7227419
Rare-Core -72.753547 -97.41442 -48.092674 0.0000000 0.7468458
Rare-Non-core -6.169631 -16.98610 4.646836 0.3718674 0.3632165
cat('\n')
  g1 <- modules_sizes %>% 
  ggplot(aes(x=asv_core, y=n, fill=asv_core)) + 
  geom_boxplot(outliers = FALSE, alpha = 0.9) +
  geom_jitter(color="black", size=1) +
  theme_classic() +
  #scale_y_continuous(limits = c(0, 145)) +
  #scale_y_break(c(50,130), space=0.4, ticklabels = c(130,140))+
  geom_signif(comparisons = list(c("Core", "Non-core"),c("Core", "Rare"),c("Non-core", "Rare")), 
              annotations = c("***","***","ns"),  y_position = c(90,100,80), tip_length = 0) +
  geom_point(data = modules_size_summary, aes(x=asv_core, y = mean), position = position_dodge(width = 0.75), size = 2, color="#d62828",alpha=0.8, show.legend=F) +
  theme(axis.text = element_text(size = 11, color = 'black'), title = element_text(size = 15), legend.position = "none") +
  scale_fill_manual(values=group.colors) +
  labs(x="ASV Group", y="Module Size", title = "(A)")

5.2 Modules No. of Grids

modules_grids <- modules_table_three_groups %>% 
  group_by(asv_core, host_group) %>% 
  summarise(n = n_distinct(grid)) 

# summary
cat('Summary','\n')

Summary

modules_grids_summary <- modules_grids %>% 
  group_by(asv_core) %>% 
  summarise(mean = mean(n), sd = sd(n))
print(knitr::kable(modules_grids_summary))
asv_core mean sd
Core 6.000000 1.264911
Non-core 2.910256 1.722169
Rare 3.028902 1.193138
cat('\n')
# ANOVA between groups
cat('ANOVA','\n')

ANOVA

anova_result <- aov(n ~ asv_core, data = modules_grids)
print(paste("F =", summary(anova_result)[[1]][["F value"]][1]))

[1] “F = 25.439235906012”

cat('\n')
print(paste("p-value =", summary(anova_result)[[1]][["Pr(>F)"]][1]))

[1] “p-value = 8.18525908221536e-11”

cat('\n')
# Perform Tukey's HSD post hoc test
cat('Tukey post-hoc','\n')

Tukey post-hoc

tukey_result <- TukeyHSD(anova_result)
# Extract the test statistics (q values) from the Tukey HSD results
tukey_q <- tukey_result$asv_core[, "diff"] / tukey_result$asv_core[, "lwr"]
tukey_result <- cbind(tukey_result$asv_core, q_value = tukey_q) %>% 
  as.data.frame() %>% 
  rownames_to_column("comp")
print(knitr::kable(tukey_result))
comp diff lwr upr p adj q_value
Non-core-Core -3.0897436 -4.1331795 -2.0463077 0.0000000 0.7475464
Rare-Core -2.9710983 -3.9785029 -1.9636937 0.0000000 0.7467880
Rare-Non-core 0.1186453 -0.3232108 0.5605015 0.8021089 -0.3670834
cat('\n')
  g2 <- modules_grids %>% 
  ggplot(aes(x=asv_core, y=n, fill=asv_core)) + 
  geom_boxplot(outliers = FALSE, alpha = 0.9) +
  geom_jitter(color="black", size=1) +
  theme_classic() +
  scale_y_continuous(limits = c(0, 9), breaks = seq(0,8,by=2)) +
  geom_signif(comparisons = list(c("Core", "Non-core"),c("Core", "Rare"),c("Non-core", "Rare")), 
              annotations = c("***","***","ns"),  y_position = c(7.7,8.5,7), tip_length = 0) +
  geom_point(data = modules_grids_summary, aes(x=asv_core, y = mean), position = position_dodge(width = 0.75), size = 2, color="#d62828",alpha=0.8, show.legend=F) +
  theme(axis.text = element_text(size = 11, color = 'black'), title = element_text(size = 15), legend.position = "none") +
  scale_fill_manual(values=group.colors) +
  labs(x="ASV Group", y="No. of Land Uses", title = "(B)")

5.3 No. of Modules

modules_per_grid <- modules_table_three_groups %>% 
  group_by(asv_core, grid) %>% 
  summarise(n = n_distinct(host_group)) 

# summary
cat('Summary','\n')

Summary

modules_per_grid_summary <- modules_per_grid %>% 
  group_by(asv_core) %>% 
  summarise(mean = mean(n), sd = sd(n))
print(knitr::kable(modules_per_grid_summary))
asv_core mean sd
Core 9.428571 1.618347
Non-core 32.428571 13.339879
Rare 74.857143 29.390637
cat('\n')
# ANOVA between groups
cat('ANOVA','\n')

ANOVA

anova_result <- aov(n ~ asv_core, data = modules_per_grid)
print(paste("F =", summary(anova_result)[[1]][["F value"]][1]))

[1] “F = 22.152152106511”

cat('\n')
print(paste("p-value =", summary(anova_result)[[1]][["Pr(>F)"]][1]))

[1] “p-value = 1.40213584683407e-05”

cat('\n')
# Perform Tukey's HSD post hoc test
cat('Tukey post-hoc','\n')

Tukey post-hoc

tukey_result <- TukeyHSD(anova_result)
# Extract the test statistics (q values) from the Tukey HSD results
tukey_q <- tukey_result$asv_core[, "diff"] / tukey_result$asv_core[, "lwr"]
tukey_result <- cbind(tukey_result$asv_core, q_value = tukey_q) %>% 
  as.data.frame() %>% 
  rownames_to_column("comp")
print(knitr::kable(tukey_result))
comp diff lwr upr p adj q_value
Non-core-Core 23.00000 -2.453251 48.45325 0.0805762 -9.375316
Rare-Core 65.42857 39.975321 90.88182 0.0000105 1.636724
Rare-Non-core 42.42857 16.975321 67.88182 0.0013191 2.499427
cat('\n')
  g3 <- modules_per_grid %>% 
  ggplot(aes(x=asv_core, y=n, fill=asv_core)) + 
  geom_boxplot(outliers = FALSE, alpha = 0.9) +
  geom_jitter(color="black", size=1) +
  theme_classic() +
  scale_y_continuous(limits = c(0, 60)) +
  geom_signif(comparisons = list(c("Core", "Non-core"),c("Core", "Rare"),c("Non-core", "Rare")), 
              annotations = c("***","***","ns"),  y_position = c(45,55,50), tip_length = 0) +
  geom_point(data = modules_per_grid_summary, aes(x=asv_core, y = mean), position = position_dodge(width = 0.75), size = 2, color="#d62828",alpha=0.8, show.legend=F) +
  theme(axis.text = element_text(size = 11, color = 'black'), title = element_text(size = 15), legend.position = "none") +
  scale_fill_manual(values=group.colors) +
  labs(x="ASV Group", y="No. of Modules per Land Use", title = "(C)")

5.4 Figure

# plotting
g1+g2+g3 + plot_layout(axis_titles = "collect_x")

6 Phylogenetic analysis

# combining final results
assembly_final <- assembly_three_groups %>% 
  mutate(same_module = ifelse(host_group1==host_group2, "Same","Different"))


# renaming the process
assembly_final %<>% mutate(process = case_when(process=="Heterogeneous.Selection" ~ "Heterogeneous Selection",
                                               process=="Homogeneous.Selection" ~ "Homogeneous Selection",
                                               process=="Dispersal.Limitation" ~ "Dispersal Limitation",
                                               process=="Homogenizing.Dispersal" ~ "Homogenizing Dispersal",
                                               .default = "Drift"))

# summary
assembly_summary <- assembly_final %>% 
  count(asv_core, same_module, process)

assembly_summary_total <- assembly_summary %>% 
  group_by(asv_core) %>% 
  summarise(n_total = sum(n))

assembly_summary_total_module <- assembly_summary %>% 
  group_by(asv_core, same_module) %>% 
  summarise(n_total = sum(n))

#plotting process ratio between groups
g1 <- assembly_summary %>% 
  group_by(asv_core, process) %>% 
  summarise(n = sum(n)) %>% 
  left_join(assembly_summary_total, by=c("asv_core")) %>% 
  mutate(n_p = n/n_total) %>% 
  ggplot(aes(fill=process, x=asv_core, y=n_p)) +
  geom_bar(position="fill", stat="identity") +
  scale_y_continuous(labels = scales::percent) +
  geom_text(aes(label = paste0(round(n_p*100,1),"%")), 
            position = position_stack(vjust = 0.5), size = 3)+
  theme_bw() +
  theme(axis.text = element_text(size = 11, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14),
        strip.text = element_text(size=12, color = 'black'), strip.background = element_rect(color = "grey80", size = 1), 
        panel.grid = element_blank(), panel.background = element_rect(colour = "black")) +
  scale_fill_manual(values=c("#adc178","#d6ccc2","#f07167","#83c5be")) +
  labs(x="ASVs Groups", y="Percentage")
print(g1)

#plotting process ratio between modules
g2 <- assembly_summary %>% 
  left_join(assembly_summary_total_module, by=c("asv_core","same_module")) %>% 
  mutate(n_p = n/n_total) %>% 
  ggplot(aes(fill=process, x=same_module, y=n_p)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~asv_core) +
  scale_y_continuous(labels = scales::percent) +
  geom_text(aes(label = paste0(round(n_p*100,1),"%")), 
            position = position_stack(vjust = 0.5), size = 3)+
  theme_bw() +
  theme(axis.text = element_text(size = 11, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14),
        strip.text = element_text(size=12, color = 'black'), strip.background = element_rect(color = "grey80", size = 1), 
        panel.grid = element_blank(), panel.background = element_rect(colour = "black")) +
  scale_fill_manual(values=c("#adc178","#d6ccc2","#f07167","#83c5be")) +
  labs(x="", y="Percentage")
print(g2)

7 Normalized Mutual Information (NMI)

8 Modules similarity across land use change gradient

8.1 Correlations between variables

# reading grid similarity results
grids_similarity_attr <- read_csv("../../data/data_processed/village_summary_new.csv")
rownames(grids_similarity_attr) <- grids_similarity_attr$grid_village

# correlations between variables
print(psych::pairs.panels(grids_similarity_attr %>%  select(-grid_village,-village,-grid), ellipses = F, lm = F))

NULL

cat('\n','\n')
# RDA for modules

anova_model_all <- NULL
anova_rda_all <- NULL
score_modules_top_all <- NULL
rda_figs <- list()

# loop for three asv groups
for (i in 1:3) {
  
  modules_similarity2 <- modules_similarity2_three_groups[[i]]
  grid_names <- rownames(modules_similarity2)
  # filtering matrix to the existing grids
  grids_similarity_attr2 <- grids_similarity_attr %>%  filter(grid_village %in% grid_names & !grepl("village", grid_village))
  modules_similarity2 <- modules_similarity2[rownames(grids_similarity_attr2),]
  
  a=data_asv %>% group_by(village,grid) %>% summarise(n=n_distinct(host_ID))
  grids_similarity_attr2 %<>% left_join(a, by=c("village","grid"))
  
  # tb-RDA
  
  # hellinger transformation
  modules_similarity2_hell <- decostand (modules_similarity2, 'hell')
  
  
  rda_result <- rda(modules_similarity2 ~ veg_PC1+veg_PC2+dist_to_village+elevation+Condition(n), data=grids_similarity_attr2, na.action = na.omit)
  # **Condition(n) - control for the number of host in each village-grid.
  
  # variation explained
  constrained_eig <- t(as.data.frame(rda_result$CCA$eig/rda_result$tot.chi*100))  # for RDAs
# unconstrained_eig <- rda_result$CA$eig/rda_result$tot.chi*100
# expl_var <- c(constrained_eig, unconstrained_eig)
# barplot (expl_var[1:20], col = c(rep ('red', length (constrained_eig)), rep ('black', length (unconstrained_eig))),
#          las = 2, ylab = '% variation')
  
R2.obs <- RsquareAdj (rda_result)$adj.r.squared

# significance test for the whole model
  anova_model <- anova(rda_result)[1,] %>% 
    mutate(R2.adj = R2.obs, asv_core = core_names[i])
  
  anova_model_all <- rbind(anova_model_all, anova_model)
  
  # significance test for all variables
  anova_rda <- anova(rda_result, by = "margin", permutations = 999) %>% 
    mutate(asv_core = core_names[i])
  anova_rda$`Pr(>F)` <- p.adjust (anova_rda$`Pr(>F)`, method = 'holm')
  
  anova_rda_all <- rbind(anova_rda_all, anova_rda)
  
  # scores
  score_modules <- as.data.frame(vegan::scores(rda_result)$species) %>% 
    mutate(RDA1_abs = abs(RDA1), RDA2_abs = abs(RDA2))
  score_modules_top <- score_modules %>% 
  slice_max(n = 5, order_by = RDA1_abs) %>% 
  bind_rows(score_modules %>% slice_max(n = 5, order_by = RDA2_abs)) %>% 
    rownames_to_column("host_group") %>% 
    mutate(asv_core = core_names[i])
  
  score_modules_top_all <- rbind(score_modules_top_all, score_modules_top)
  
  # plotting
  # samples
  rda_samples <- as.data.frame(scores(rda_result)$sites) %>% 
    rownames_to_column("grid_village") %>% 
    left_join(grids_similarity_attr2[c("grid_village","village","grid")], by="grid_village")
  
  # variables
  rda_var <- as.data.frame(rda_result$CCA$biplot)
  
  # plot
  g <- rda_samples %>% 
    ggplot(aes(RDA1, RDA2, color=grid, shape=village)) +
    geom_hline(yintercept = 0, linetype = "dotted", color = "grey") +
  geom_vline(xintercept = 0, linetype = "dotted", color = "grey") +
  geom_point(size = 4, alpha=0.6) +
  geom_segment(data = rda_var, inherit.aes = FALSE, aes(x = 0, y = 0, xend = (RDA1*0.85), yend = (RDA2*0.85)), 
                 arrow = arrow(length = unit(0.15, "cm"))) +
    geom_text(data = rda_var,inherit.aes = FALSE, aes(x = RDA1, y = RDA2, label=rownames(rda_var)))+
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", size=1), panel.grid = element_blank())+
  labs(x = paste("RDA1 (",round(constrained_eig[1],2),"%)", sep = ""), y = paste("RDA2 (",round(constrained_eig[2],2),"%)", sep = ""), 
       title = paste(core_names[i]), color = "Land Use", shape = "Village")
  rda_figs <- append(rda_figs, list(g))
}

print(knitr::kable(anova_model_all))
Df Variance F Pr(>F) R2.adj asv_core
Model 4 8.153774 1.069403 0.372 0.0114267 Core
Model1 4 12.241210 1.458269 0.046 0.0976983 Non-core
Model2 4 13.915078 1.163366 0.110 0.0392719 Rare
cat('\n','\n')
print(knitr::kable(anova_rda_all))
Df Variance F Pr(>F) asv_core
veg_PC1 1 0.5655093 0.2966759 1.000 Core
veg_PC2 1 1.5189484 0.7968665 1.000 Core
dist_to_village 1 0.4230898 0.2219602 1.000 Core
elevation 1 4.4679061 2.3439405 0.112 Core
Residual 11 20.9676684 NA NA Core
veg_PC11 1 1.4783178 0.7044354 1.000 Non-core
veg_PC21 1 2.1010753 1.0011865 1.000 Non-core
dist_to_village1 1 1.9840607 0.9454277 1.000 Non-core
elevation1 1 7.0429163 3.3560304 0.004 Non-core
Residual1 11 23.0844391 NA NA Non-core
veg_PC12 1 2.3809244 0.7962259 1.000 Rare
veg_PC22 1 3.7684380 1.2602365 0.336 Rare
dist_to_village2 1 2.7102902 0.9063720 1.000 Rare
elevation2 1 5.7427517 1.9204841 0.004 Rare
Residual2 11 32.8928887 NA NA Rare
cat('\n','\n')
rda_figs[[1]] + rda_figs[[2]] + rda_figs[[3]] + plot_layout(guides='collect') &
  theme(legend.position='bottom')

cat('\n','\n')

8.2 Mantel test